clear all
set more off, perm
set mem 10000000
set matsize 10000
version 13

************************************************************* 
*** DD regressions: first-stage VD electricity variables ****
************************************************************* 

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

********************************************************************************
********************************************************************************

** 1. Run difference in discontinuities for VD elec variables
{
use "$panel/panel_dataset_dd.dta", clear
keep if vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
drop pct_06-vd_ame_d_spcl
drop work_main_ag_p-pct_irr_twell pct_irr_irr_twell
drop p_300_X-band300_p_200
drop bin25_p-within_border_100k
drop vplan vplan2 vplan3 vdpr vdpr2 vdpr3
drop treat_x_post dd_* stateyear
drop lights_mean_hat lights_max_hat
drop no_hh tot_p tot_m tot_f vd01_id conc_id v_id_hpca11 vd11_id c_code01 v_id_master ///
	h?_id h?v_id f_area totp_h? count_h? maxp_h? grad_max grad_mean

merge 1:1 pca01_id st_code dt_code vplan4 vdpr4 year using ///
	"$panel/panel_dataset_lights_allyears.dta", keep(3) nogen
egen temp1 = max(lights_max*(year==2001)), by(pca01_id)
egen temp2 = max(lights_max*(year==2011)), by(pca01_id)
gen lights_diff = abs(temp2-temp1)
assert lights_diff!=.
	
egen stdtbk = group(stdt bk_code)
gen X = (tot_p01-300)
assert X!=.
gen D = X>=0
gen DX = D*X

gen aw150 = abs(150-abs(300-tot_p01))
gen aw100 = abs(100-abs(300-tot_p01))
	
preserve
use "$panel/panel_dataset_full.dta", clear
egen temp1 = mean(vdp_pwr_h_all_avg) if vdp_pwr_h_all_avg_11>0 & vdp_pwr_h_all_avg_11!=., by(st_code dt_code)
egen HRS_all_wide = mode(temp1), by(st_code dt_code)
egen temp2 = mean(vdp_pwr_h_dom_avg) if vdp_pwr_h_dom_avg_11>0 & vdp_pwr_h_dom_avg_11!=., by(st_code dt_code)
egen HRS_dom_wide = mode(temp2), by(st_code dt_code)
gen HRS_all = HRS_all_wide>=10 & HRS_all_wide!=.
gen HRS_dom = HRS_dom_wide>=10 & HRS_dom_wide!=.
replace HRS_all = . if HRS_all_wide==.
replace HRS_dom = . if HRS_dom_wide==.
keep st_code dt_code HRS_all HRS_dom
duplicates drop
tempfile hrs
save `hrs'
restore
merge m:1 st_code dt_code using `hrs', nogen

cap drop H
gen H = HRS_all


gen yvar = ""
gen fes = ""
gen vce = ""
gen bw = .
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen beta_hi = .
gen se_hi = .
gen tscore_hi = .
gen pvalue_hi = .
gen ci95_lo_hi = .
gen ci95_hi_hi = .
gen beta_lo = .
gen se_lo = .
gen tscore_lo = .
gen pvalue_lo = .
gen ci95_lo_lo = .
gen ci95_hi_lo = .
gen pval_equal = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
	
	// District FEs, 100 bw
local yvar = "vd_pwr_d_all"
local fes = "pca01_id year#stdt"
local vce = "cluster stdtbk"
local bw = 100
local row = 1

reghdfe `yvar' /// 1.D#2001.year ///
	1.D#2011.year c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
	pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
	
	replace yvar = "`yvar'" in `row'
	replace fes = "`fes'" in `row'
	replace vce = "`vce'" in `row'
	replace bw = `bw' in `row'
	replace beta = _b[1.D#2011.year] in `row'
	replace se = _se[1.D#2011.year] in `row'	
	replace tscore = beta/se in `row'
	replace pvalue = 2*ttail(e(df_r),abs(_b[1.D#2011.year]/_se[1.D#2011.year])) in `row'
	replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
	sum `yvar' if e(sample)							
	replace ymean = r(mean)	in `row'
	replace nobs = e(N)	in `row'
	replace nclust = e(N_clust) in `row'
	replace rmse = e(rmse) in `row'
	
	
	// District FEs, 150 bw
local yvar = "vd_pwr_d_all"
local fes = "pca01_id year#stdt"
local vce = "cluster stdtbk"
local bw = 150
local row = 2

reghdfe `yvar' /// 1.D#2001.year ///
	1.D#2011.year c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
	pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
	
	replace yvar = "`yvar'" in `row'
	replace fes = "`fes'" in `row'
	replace vce = "`vce'" in `row'
	replace bw = `bw' in `row'
	replace beta = _b[1.D#2011.year] in `row'
	replace se = _se[1.D#2011.year] in `row'	
	replace tscore = beta/se in `row'
	replace pvalue = 2*ttail(e(df_r),abs(_b[1.D#2011.year]/_se[1.D#2011.year])) in `row'
	replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
	sum `yvar' if e(sample)							
	replace ymean = r(mean)	in `row'
	replace nobs = e(N)	in `row'
	replace nclust = e(N_clust) in `row'
	replace rmse = e(rmse) in `row'
	

	// District FEs, 100 bw, x power supply +/- 10 hours/day
local yvar = "vd_pwr_d_all"
local fes = "pca01_id year#stdt"
local vce = "cluster stdtbk"
local bw = 100
local row = 3

reghdfe `yvar' ///
	1.D#2011.year#1.H 1.D#2011.year#0.H c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
	pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
	
	replace yvar = "`yvar'" in `row'
	replace fes = "`fes'" in `row'
	replace vce = "`vce'" in `row'
	replace bw = `bw' in `row'
	replace beta_hi = _b[1.D#2011.year#1.H] in `row'
	replace se_hi = _se[1.D#2011.year#1.H] in `row'	
	replace tscore_hi = beta/se in `row'
	replace pvalue_hi = 2*ttail(e(df_r),abs(_b[1.D#2011.year#1.H]/_se[1.D#2011.year#1.H])) in `row'
	replace ci95_lo_hi = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi_hi = beta + se*invttail(e(df_r),0.025) in `row'
	replace beta_lo = _b[1.D#2011.year#0.H] in `row'
	replace se_lo = _se[1.D#2011.year#0.H] in `row'	
	replace tscore_lo = beta/se in `row'
	replace pvalue_lo = 2*ttail(e(df_r),abs(_b[1.D#2011.year#0.H]/_se[1.D#2011.year#0.H])) in `row'
	replace ci95_lo_lo = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi_lo = beta + se*invttail(e(df_r),0.025) in `row'
	test 1.D#2011.year#1.H=1.D#2011.year#0.H
	replace pval_equal = r(p) in `row'
	sum `yvar' if e(sample)							
	replace ymean = r(mean)	in `row'
	replace nobs = e(N)	in `row'
	replace nclust = e(N_clust) in `row'
	replace rmse = e(rmse) in `row'
	

	// District FEs, 150 bw, x power supply +/- 10 hours/day
local yvar = "vd_pwr_d_all"
local fes = "pca01_id year#stdt"
local vce = "cluster stdtbk"
local bw = 150
local row = 4

reghdfe `yvar' ///
	1.D#2011.year#1.H 1.D#2011.year#0.H c.X#i.year c.DX#i.year if inrange(tot_p01,300-`bw',300+`bw') & ///
	pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
	
	replace yvar = "`yvar'" in `row'
	replace fes = "`fes'" in `row'
	replace vce = "`vce'" in `row'
	replace bw = `bw' in `row'
	replace beta_hi = _b[1.D#2011.year#1.H] in `row'
	replace se_hi = _se[1.D#2011.year#1.H] in `row'	
	replace tscore_hi = beta/se in `row'
	replace pvalue_hi = 2*ttail(e(df_r),abs(_b[1.D#2011.year#1.H]/_se[1.D#2011.year#1.H])) in `row'
	replace ci95_lo_hi = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi_hi = beta + se*invttail(e(df_r),0.025) in `row'
	replace beta_lo = _b[1.D#2011.year#0.H] in `row'
	replace se_lo = _se[1.D#2011.year#0.H] in `row'	
	replace tscore_lo = beta/se in `row'
	replace pvalue_lo = 2*ttail(e(df_r),abs(_b[1.D#2011.year#0.H]/_se[1.D#2011.year#0.H])) in `row'
	replace ci95_lo_lo = beta - se*invttail(e(df_r),0.025) in `row'
	replace ci95_hi_lo = beta + se*invttail(e(df_r),0.025) in `row'
	test 1.D#2011.year#1.H=1.D#2011.year#0.H
	replace pval_equal = r(p) in `row'
	sum `yvar' if e(sample)							
	replace ymean = r(mean)	in `row'
	replace nobs = e(N)	in `row'
	replace nclust = e(N_clust) in `row'
	replace rmse = e(rmse) in `row'
	
	
keep yvar-rmse
dropmiss, obs force
assert _N==4
compress
save "$results/diff_in_disc_vdelec.dta"	, replace
	
}

********************************************************************************
********************************************************************************

** 2. Run VD diff-in-diff colllapsing to district level (by HH, to align with NSS)
{
use "$panel/panel_dataset_dd.dta", clear

egen double temp_denom = sum(no_hh), by(year st_code dt_code)
foreach v of varlist vd_pwr* {
	egen double temp = sum(no_hh * `v' / temp_denom), by(year st_code dt_code)
	replace `v' = temp
	drop temp
}
keep year st_code dt_code vd_pwr* treat_x_post
duplicates drop
unique year st_code dt_code
assert r(unique)==r(N)

sum vd_pwr_* if year==2001, detail
egen stdt = group(st_code dt_code)

preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss'


gen yvar = ""
gen fes = ""
gen vce = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1	

local vce = "cluster stdt"

foreach yvar of varlist vd_pwr* {
	foreach fe in 1 2 3  {
		
		if `fe'==1 {
			local fes = "year stdt"
		}
		else if `fe'==2 {
			local fes = "year stdt exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
		}
		else if `fe'==3 {
			local fes = "year stdt exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
		}
	
		reghdfe `yvar' treat_x_post, a(`fes') vce(`vce')

		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace beta = _b[treat_x_post] in `row'
		replace se = _se[treat_x_post] in `row'	
		replace tscore = beta/se in `row'
		replace pvalue = 2*ttail(e(df_r),abs(_b[treat_x_post]/_se[treat_x_post])) in `row'
		replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}

keep yvar-rmse
dropmiss, obs force
assert _N==12
compress
save "$results/dd_vdelec_distlevel.dta", replace
	
}

********************************************************************************
********************************************************************************

** 3. Run VD diff-in-diff at village levels (pooled coefficient)
{
use "$panel/panel_dataset_dd.dta", clear

preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss'

gen yvar = ""
gen fes = ""
gen vce = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1	

local vce = "cluster stdt"

foreach yvar of varlist vd_pwr* {
	
	foreach fe in 1 2 3  {
		
		if `fe'==1 {
			local fes = "year pca01_id"
		}
		else if `fe'==2 {
			local fes = "year pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
		}
		else if `fe'==3 {
			local fes = "year pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
		}
		
		reghdfe `yvar' treat_x_post, a(`fes') vce(`vce')

		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		replace beta = _b[treat_x_post] in `row'
		replace se = _se[treat_x_post] in `row'	
		replace tscore = beta/se in `row'
		replace pvalue = 2*ttail(e(df_r),abs(_b[treat_x_post]/_se[treat_x_post])) in `row'
		replace ci95_lo = beta - se*invttail(e(df_r),0.025) in `row'
		replace ci95_hi = beta + se*invttail(e(df_r),0.025) in `row'
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}

keep yvar-rmse
dropmiss, obs force
assert _N==12
compress
save "$results/dd_vdelec_pooled.dta", replace
	
}

********************************************************************************
********************************************************************************

** 4. Run VD diff-in-diff at village levels (population bin coefficients)
{
use "$panel/panel_dataset_dd.dta", clear

preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss'

	// Create 500-person population bins (switching from 300 to 500 to align with NSS population medians)
drop popbin*
gen popbin = 0
local bin = 500
local bmax = 3000
local bmax2 = 2900	
forvalues i = 0(`bin')`bmax2' {
  local iplus1 = `i' + `bin'
  gen popbin_`i'_`iplus1' = tot_p01>`i' & tot_p01<=`iplus1'
  la var popbin_`i'_`iplus1' "2001 population bin: (`i',`iplus1']"
  replace popbin = `i' if popbin_`i'_`iplus1'==1
}
gen popbin_`bmax'_max = tot_p01>`bmax'
replace popbin = `bmax' if popbin_`bmax'_max==1
la var popbin "Lower end of 2001 population bin"

	// DD population bins
drop dd_x_popbin*
forvalues i = 0(`bin')`bmax2' {
  local iplus1 = `i' + `bin'
  gen dd_x_popbin_`i'_`iplus1' = treat_x_post * popbin_`i'_`iplus1'
  la var dd_x_popbin_`i'_`iplus1' "DD treat x popbin: `i'-`iplus1'"
}
gen dd_x_popbin_`bmax'_max = treat_x_post * popbin_`bmax'_max
la var dd_x_popbin_`bmax'_max "DD treat x popbin `bmax' to infinity"



gen yvar = ""
gen fes = ""
gen vce = ""
forvalues p = 0(`bin')`bmax' {
	local p1 = `p'+`bin'
	if `p'==`bmax' {
		local p1 = "max"
	}
	gen beta_`p'_`p1' = .
	gen se_`p'_`p1' = .
	gen tscore_`p'_`p1' = .
	gen pvalue_`p'_`p1' = .
	gen ci95_lo_`p'_`p1' = .
	gen ci95_hi_`p'_`p1' = .
}
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
local row = 1	

local vce = "cluster stdt"

foreach yvar of varlist vd_pwr* {
	
	foreach fe in 1 2 3  {
		
		if `fe'==1 {
			local fes = "year pca01_id"
		}
		else if `fe'==2 {
			local fes = "year#popbin pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
		}
		else if `fe'==3 {
			local fes = "year#popbin pca01_id exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
		}
		
		reghdfe `yvar' dd_x_popbin_*, a(`fes') vce(`vce')

		replace yvar = "`yvar'" in `row'
		replace fes = "`fes'" in `row'
		replace vce = "`vce'" in `row'
		forvalues p = 0(`bin')`bmax' {	
			local p1 = `p'+`bin'
			if `p'==`bmax' {
				local p1 = "max"
			}
			qui replace beta_`p'_`p1' = _b[dd_x_popbin_`p'_`p1'] if _n==`row'
			qui replace se_`p'_`p1' = _se[dd_x_popbin_`p'_`p1'] if _n==`row'
			qui replace tscore_`p'_`p1' = beta_`p'_`p1'/se_`p'_`p1' in `row'
			qui replace pvalue_`p'_`p1' = 2*ttail(e(df_r),abs(_b[dd_x_popbin_`p'_`p1']/_se[dd_x_popbin_`p'_`p1'])) in `row'
			qui replace ci95_lo_`p'_`p1' = beta_`p'_`p1' - se_`p'_`p1'*invttail(e(df_r),0.025) in `row'
			qui replace ci95_hi_`p'_`p1' = beta_`p'_`p1' + se_`p'_`p1'*invttail(e(df_r),0.025) in `row'
		}
		sum `yvar' if e(sample)							
		replace ymean = r(mean)	in `row'
		replace nobs = e(N)	in `row'
		replace nclust = e(N_clust) in `row'
		replace rmse = e(rmse) in `row'
		local row = `row' + 1

	}
}

keep yvar-rmse
dropmiss, obs force
assert _N==12
compress
save "$results/dd_vdelec_popbins.dta", replace
	
}

********************************************************************************
********************************************************************************
